In [1]:
import warnings
warnings.filterwarnings('ignore')
In [2]:
# Importación librerías
import pandas as pd
import numpy as np
import keras
import tensorflow as tf

from numpy import array
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.preprocessing.sequence import TimeseriesGenerator

import os

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import plotly.graph_objects as go

import optuna
from optuna import Trial
from sklearn import metrics
from keras.callbacks import EarlyStopping,ReduceLROnPlateau

from sklearn.preprocessing import MinMaxScaler,StandardScaler

from pandas_datareader import data as pdr
import yfinance as yfin
yfin.pdr_override()
In [3]:
data_train=pd.read_excel('dftrain.xlsx',index_col='Date')
data_val=pd.read_excel('dftest.xlsx',index_col='Date')
data_test=pd.read_excel('dftestall.xlsx',index_col='Date')

data_train.columns=[i.replace('.ReturnSuavizado','') for i in data_train.columns]
data_val.columns=[i.replace('.ReturnSuavizado','') for i in data_val.columns]
data_test.columns=[i.replace('.ReturnSuavizado','') for i in data_test.columns]
In [4]:
frames = [data_train,data_val]
data_trainall = pd.concat(frames)
data_trainall
Out[4]:
GSPC AAPL AMZN MSTF TSLA GOOG GOOGL NVDA BRK.B META UNH JNJ PG VIX DolarIndex
Date
2012-05-21 1.746909 3.490283 1.070000 1.075062 1.356247 1.510118 1.493379 0.655306 1.049199 1.510658 2.073412 0.155280 -0.270027 -1.810652 -0.199267
2012-05-22 -0.007725 -0.534660 -0.767095 -0.020680 2.180645 -1.547496 -1.525078 -0.556728 -0.228852 1.510658 0.172248 0.036174 -0.458769 0.327256 0.020139
2012-05-23 0.129489 1.455819 0.465606 -1.562769 0.184059 0.941975 0.932516 0.950053 0.077185 1.510658 -0.540879 -0.473359 -1.358408 -0.062811 2.697792
2012-05-24 0.093857 -0.629651 -0.575519 -0.138388 -0.838114 -0.698745 -0.687197 -1.156537 0.015914 1.507333 1.096323 0.705074 0.263350 -0.473912 -0.331184
2012-05-25 -0.309696 -0.388628 -0.662707 -0.067466 -0.560168 -1.436628 -1.415629 0.919416 -0.720151 -1.709279 -0.189339 -1.060502 -0.186788 0.173503 0.474968
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2023-01-25 -0.083719 -0.347177 0.458486 -0.451382 0.073811 -1.781387 -1.782862 0.071015 0.142652 -0.592635 0.080526 0.716622 -0.124685 -0.056849 -0.374998
2023-01-26 1.182492 0.866959 1.129257 2.037044 3.353924 1.663595 1.580865 0.953151 -0.342923 1.920064 -0.059264 -0.443317 -0.484419 -0.228505 0.105223
2023-01-27 0.220445 0.797927 1.648946 0.000575 3.363623 1.024389 1.236573 1.098853 -0.605462 1.410943 -1.038815 -0.470380 -0.493864 -0.134595 -0.300968
2023-01-30 -1.548164 -1.321312 -0.983807 -1.571251 -2.182643 -1.951333 -1.718173 -2.552922 -0.627754 -1.555253 -0.096230 0.210523 0.289765 1.074298 -0.181023
2023-01-31 1.590772 0.509451 1.386626 1.386587 1.213647 1.292552 1.278452 0.743473 1.276171 0.594704 1.978149 0.891425 0.997751 -0.353844 0.192047

2792 rows × 15 columns

In [5]:
datapacf=data_train.GSPC.diff()
datapacf=datapacf[1:]
In [7]:
import matplotlib as mpl
from matplotlib import pyplot
from statsmodels.graphics.tsaplots import plot_pacf

with mpl.rc_context():
    mpl.rc("figure", figsize=(10,5))
    plot_pacf(datapacf, lags=50)
In [8]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(data_train.corr(), vmin=-1, vmax=1, annot=True)
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':12}, pad=12)
Out[8]:
Text(0.5, 1.0, 'Correlation Heatmap')
In [9]:
data_train=data_train[['GSPC','MSTF','BRK.B','DolarIndex']]
data_val=data_val[['GSPC','MSTF','BRK.B','DolarIndex']]
data_test=data_test[['GSPC','MSTF','BRK.B','DolarIndex']]
data_trainall=data_trainall[['GSPC','MSTF','BRK.B','DolarIndex']]
In [10]:
def shift_data(df,serie_name,period):
    for i in range(1,period+1):
        last=len(df.columns)
        df.insert(last, serie_name+'.'+str(i), df[serie_name].shift(periods=i))

cur_columns=data_train.columns
for col in cur_columns:  
    shift_data(data_train,col,5)
data_train=data_train.dropna()


cur_columns=data_val.columns
for col in cur_columns:  
    shift_data(data_val,col,5)
data_val=data_val.dropna()


cur_columns=data_test.columns
for col in cur_columns:  
    shift_data(data_test,col,5)
data_test=data_test.dropna()


cur_columns=data_trainall.columns
for col in cur_columns:  
    shift_data(data_trainall,col,5)
data_trainall=data_trainall.dropna()
In [11]:
scaler = MinMaxScaler()
train_scaled = pd.DataFrame(scaler.fit_transform(data_train), columns=data_train.columns)
xtrain=train_scaled.iloc[:,1:]
ytrain=train_scaled.iloc[:,0]

val_scaled=pd.DataFrame(scaler.transform(data_val), columns=data_val.columns)
xval=val_scaled.iloc[:,1:]
yval=val_scaled.iloc[:,0]

scaler_all = MinMaxScaler()
trainall_scaled=pd.DataFrame(scaler_all.fit_transform(data_trainall), columns=data_trainall.columns)
xtrainall=trainall_scaled.iloc[:,1:]
ytrainall=trainall_scaled.iloc[:,0]

test_scaled = pd.DataFrame(scaler_all.transform(data_test), columns=data_test.columns)
xtest=test_scaled.iloc[:,1:]
ytest=test_scaled.iloc[:,0]
In [12]:
print(xtrain.shape[0])
print(xval.shape[0])
print(xtest.shape[0])
print(xtrainall.shape[0])
2767
15
15
2787
In [13]:
traindates=data_train.index
testdates=data_test.index
valdates=data_val.index
traindatesall=data_trainall.index
In [14]:
from sklearn.neighbors import LocalOutlierFactor
lof = LocalOutlierFactor()
yhat = lof.fit_predict(xtrain)

mask = yhat != -1
xtrain_no, ytrain_no,traindates = xtrain[mask], ytrain[mask],traindates[mask]

lof = LocalOutlierFactor()
yhat = lof.fit_predict(xtrainall)

mask = yhat != -1
xtrainall_no, ytrainall_no,traindates = xtrainall[mask], ytrainall[mask],traindatesall[mask]
In [15]:
ytrain_n=ytrain_no.to_numpy().reshape(-1,1,1)
xtrain_n=xtrain_no.to_numpy().reshape(-1,1,data_train.shape[1]-1)

yval_n=yval.to_numpy().reshape(-1,1,1)
xval_n=xval.to_numpy().reshape(-1,1,data_val.shape[1]-1)

ytest_n=ytest.to_numpy().reshape(-1,1,1)
xtest_n=xtest.to_numpy().reshape(-1,1,data_test.shape[1]-1)

ytrainall_n=ytrainall_no.to_numpy().reshape(-1,1,1)
xtrainall_n=xtrainall_no.to_numpy().reshape(-1,1,data_trainall.shape[1]-1)
In [16]:
import random
seed = 128
def random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
random_seed(seed)
In [17]:
def objective(trial):
    keras.backend.clear_session()
    n_layers = trial.suggest_int('n_layers', 1, 4)
    model = keras.Sequential()
    for i in range(n_layers):
        num_hidden = trial.suggest_int(f'n_units_l{i}', data_train.shape[1]-1, 400, log=True)
        model.add(keras.layers.LSTM(num_hidden, input_shape=(1, data_train.shape[1]-1),return_sequences=True,
                               activation=trial.suggest_categorical(f'activation{i}', ['relu', 'linear','swish','sigmoid'])))
        model.add(keras.layers.Dropout(rate = trial.suggest_float(f'dropout{i}', 0.0, 0.5))) 
    model.add(keras.layers.Dense(1,activation=trial.suggest_categorical(f'finalact', ['relu', 'linear','swish','sigmoid']))) 
    val_ds = (xval_n,yval_n)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=trial.suggest_float('LRfactor', 0.0, 0.5),patience=trial.suggest_int('LRpatience', 5, 20),min_lr=1e-05,verbose=0)
    model.compile(loss='mse', optimizer=trial.suggest_categorical(f'optimizer', ['Adagrad','adam', 'sgd','RMSprop']))
    run_history = model.fit(xtrain_n,ytrain_n,validation_data=val_ds,epochs=50,callbacks=[reduce_lr],verbose=0)
    return min(run_history.history['val_loss'])
In [26]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30, timeout=1800)
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
[I 2023-05-04 21:39:04,328] A new study created in memory with name: no-name-2f726e47-3c77-4804-8b9a-7d9d1ced5527
[I 2023-05-04 21:39:33,936] Trial 0 finished with value: 0.03469480574131012 and parameters: {'n_layers': 2, 'n_units_l0': 63, 'activation0': 'sigmoid', 'dropout0': 0.4497758295784005, 'n_units_l1': 36, 'activation1': 'swish', 'dropout1': 0.45494398736702957, 'finalact': 'relu', 'LRfactor': 0.3385966726258443, 'LRpatience': 20, 'optimizer': 'Adagrad'}. Best is trial 0 with value: 0.03469480574131012.
[I 2023-05-04 21:40:25,939] Trial 1 finished with value: 0.22325070202350616 and parameters: {'n_layers': 3, 'n_units_l0': 210, 'activation0': 'linear', 'dropout0': 0.08908484293328278, 'n_units_l1': 58, 'activation1': 'relu', 'dropout1': 0.3607393324724844, 'n_units_l2': 64, 'activation2': 'swish', 'dropout2': 0.050528786668444625, 'finalact': 'swish', 'LRfactor': 0.4261282763954562, 'LRpatience': 7, 'optimizer': 'Adagrad'}. Best is trial 0 with value: 0.03469480574131012.
[I 2023-05-04 21:41:09,290] Trial 2 finished with value: 0.11625661700963974 and parameters: {'n_layers': 4, 'n_units_l0': 27, 'activation0': 'linear', 'dropout0': 0.32081556059981314, 'n_units_l1': 48, 'activation1': 'linear', 'dropout1': 0.4271050399523469, 'n_units_l2': 56, 'activation2': 'relu', 'dropout2': 0.1350346668384107, 'n_units_l3': 23, 'activation3': 'relu', 'dropout3': 0.07419599259274368, 'finalact': 'relu', 'LRfactor': 0.06626474452211745, 'LRpatience': 11, 'optimizer': 'Adagrad'}. Best is trial 0 with value: 0.03469480574131012.
[I 2023-05-04 21:42:03,526] Trial 3 finished with value: 0.0024616126902401447 and parameters: {'n_layers': 2, 'n_units_l0': 119, 'activation0': 'linear', 'dropout0': 0.4666291475397445, 'n_units_l1': 321, 'activation1': 'linear', 'dropout1': 0.23297453653473893, 'finalact': 'linear', 'LRfactor': 0.3716491551697835, 'LRpatience': 16, 'optimizer': 'adam'}. Best is trial 3 with value: 0.0024616126902401447.
[I 2023-05-04 21:42:50,352] Trial 4 finished with value: 0.028451677411794662 and parameters: {'n_layers': 2, 'n_units_l0': 228, 'activation0': 'sigmoid', 'dropout0': 0.2936512952485221, 'n_units_l1': 83, 'activation1': 'swish', 'dropout1': 0.4866080184224619, 'finalact': 'relu', 'LRfactor': 0.21004610191077522, 'LRpatience': 13, 'optimizer': 'Adagrad'}. Best is trial 3 with value: 0.0024616126902401447.
[I 2023-05-04 21:43:56,081] Trial 5 finished with value: 0.002349897287786007 and parameters: {'n_layers': 4, 'n_units_l0': 278, 'activation0': 'linear', 'dropout0': 0.19204236108062905, 'n_units_l1': 23, 'activation1': 'sigmoid', 'dropout1': 0.4787275890234401, 'n_units_l2': 270, 'activation2': 'swish', 'dropout2': 0.25183272134384543, 'n_units_l3': 75, 'activation3': 'relu', 'dropout3': 0.07401573027113578, 'finalact': 'swish', 'LRfactor': 0.30819563778768044, 'LRpatience': 19, 'optimizer': 'adam'}. Best is trial 5 with value: 0.002349897287786007.
[I 2023-05-04 21:44:57,951] Trial 6 finished with value: 0.027446050196886063 and parameters: {'n_layers': 3, 'n_units_l0': 98, 'activation0': 'swish', 'dropout0': 0.11033841508715692, 'n_units_l1': 266, 'activation1': 'sigmoid', 'dropout1': 0.007128239202319531, 'n_units_l2': 233, 'activation2': 'sigmoid', 'dropout2': 0.18063155986861595, 'finalact': 'swish', 'LRfactor': 0.4353618787241985, 'LRpatience': 18, 'optimizer': 'Adagrad'}. Best is trial 5 with value: 0.002349897287786007.
[I 2023-05-04 21:46:07,778] Trial 7 finished with value: 0.025309884920716286 and parameters: {'n_layers': 4, 'n_units_l0': 86, 'activation0': 'relu', 'dropout0': 0.13469944325185834, 'n_units_l1': 51, 'activation1': 'sigmoid', 'dropout1': 0.45407596417766094, 'n_units_l2': 233, 'activation2': 'relu', 'dropout2': 0.1939623726401456, 'n_units_l3': 137, 'activation3': 'swish', 'dropout3': 0.001329233805416219, 'finalact': 'swish', 'LRfactor': 0.39399612905476733, 'LRpatience': 17, 'optimizer': 'RMSprop'}. Best is trial 5 with value: 0.002349897287786007.
[I 2023-05-04 21:47:12,579] Trial 8 finished with value: 0.02543598599731922 and parameters: {'n_layers': 3, 'n_units_l0': 184, 'activation0': 'swish', 'dropout0': 0.03378915321380127, 'n_units_l1': 159, 'activation1': 'sigmoid', 'dropout1': 0.14886044336657522, 'n_units_l2': 107, 'activation2': 'linear', 'dropout2': 0.1958553785729048, 'finalact': 'sigmoid', 'LRfactor': 0.25626449401087686, 'LRpatience': 7, 'optimizer': 'Adagrad'}. Best is trial 5 with value: 0.002349897287786007.
[I 2023-05-04 21:47:31,867] Trial 9 finished with value: 0.02460320107638836 and parameters: {'n_layers': 1, 'n_units_l0': 35, 'activation0': 'relu', 'dropout0': 0.22335975890434528, 'finalact': 'linear', 'LRfactor': 0.40034922645562904, 'LRpatience': 19, 'optimizer': 'Adagrad'}. Best is trial 5 with value: 0.002349897287786007.
[I 2023-05-04 21:49:10,367] Trial 10 finished with value: 0.026280028745532036 and parameters: {'n_layers': 4, 'n_units_l0': 386, 'activation0': 'linear', 'dropout0': 0.1985833543391322, 'n_units_l1': 23, 'activation1': 'relu', 'dropout1': 0.3294185031066814, 'n_units_l2': 379, 'activation2': 'swish', 'dropout2': 0.39491531481846653, 'n_units_l3': 237, 'activation3': 'linear', 'dropout3': 0.41822073147160505, 'finalact': 'sigmoid', 'LRfactor': 0.4969425578807015, 'LRpatience': 13, 'optimizer': 'sgd'}. Best is trial 5 with value: 0.002349897287786007.
[I 2023-05-04 21:49:53,861] Trial 11 finished with value: 0.00242361961863935 and parameters: {'n_layers': 1, 'n_units_l0': 370, 'activation0': 'linear', 'dropout0': 0.4936815127364292, 'finalact': 'linear', 'LRfactor': 0.3025365406445617, 'LRpatience': 16, 'optimizer': 'adam'}. Best is trial 5 with value: 0.002349897287786007.
[I 2023-05-04 21:50:38,290] Trial 12 finished with value: 0.0022341052535921335 and parameters: {'n_layers': 1, 'n_units_l0': 385, 'activation0': 'linear', 'dropout0': 0.3748601955310189, 'finalact': 'linear', 'LRfactor': 0.2812289816819257, 'LRpatience': 15, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335.
[I 2023-05-04 21:51:15,565] Trial 13 finished with value: 0.0023731838446110487 and parameters: {'n_layers': 1, 'n_units_l0': 297, 'activation0': 'linear', 'dropout0': 0.3726751558901054, 'finalact': 'linear', 'LRfactor': 0.21513406150227118, 'LRpatience': 15, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335.
[I 2023-05-04 21:52:12,785] Trial 14 finished with value: 0.0030617346055805683 and parameters: {'n_layers': 3, 'n_units_l0': 390, 'activation0': 'linear', 'dropout0': 0.37645032484453356, 'n_units_l1': 24, 'activation1': 'sigmoid', 'dropout1': 0.3402940035489825, 'n_units_l2': 26, 'activation2': 'swish', 'dropout2': 0.3473589864184872, 'finalact': 'swish', 'LRfactor': 0.27683807897904333, 'LRpatience': 11, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335.
[I 2023-05-04 21:52:52,475] Trial 15 finished with value: 0.002801797352731228 and parameters: {'n_layers': 2, 'n_units_l0': 263, 'activation0': 'linear', 'dropout0': 0.25316446423339506, 'n_units_l1': 106, 'activation1': 'sigmoid', 'dropout1': 0.4997830055487583, 'finalact': 'linear', 'LRfactor': 0.21203124146168495, 'LRpatience': 20, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335.
[I 2023-05-04 21:53:17,713] Trial 16 finished with value: 0.02614479511976242 and parameters: {'n_layers': 1, 'n_units_l0': 162, 'activation0': 'sigmoid', 'dropout0': 0.19644192905300228, 'finalact': 'swish', 'LRfactor': 0.1433997174995662, 'LRpatience': 5, 'optimizer': 'sgd'}. Best is trial 12 with value: 0.0022341052535921335.
[I 2023-05-04 21:54:12,643] Trial 17 finished with value: 0.025485746562480927 and parameters: {'n_layers': 4, 'n_units_l0': 259, 'activation0': 'relu', 'dropout0': 0.39136264370980867, 'n_units_l1': 34, 'activation1': 'linear', 'dropout1': 0.26290349477033526, 'n_units_l2': 145, 'activation2': 'linear', 'dropout2': 0.4920459106277286, 'n_units_l3': 61, 'activation3': 'sigmoid', 'dropout3': 0.20011530860455395, 'finalact': 'sigmoid', 'LRfactor': 0.3225318885145574, 'LRpatience': 14, 'optimizer': 'RMSprop'}. Best is trial 12 with value: 0.0022341052535921335.
[I 2023-05-04 21:54:58,154] Trial 18 finished with value: 0.0024991645477712154 and parameters: {'n_layers': 2, 'n_units_l0': 150, 'activation0': 'swish', 'dropout0': 0.28992015279803035, 'n_units_l1': 81, 'activation1': 'relu', 'dropout1': 0.41806921128574387, 'finalact': 'linear', 'LRfactor': 0.3357577341446319, 'LRpatience': 18, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335.
[I 2023-05-04 21:56:28,899] Trial 19 finished with value: 0.0022323420271277428 and parameters: {'n_layers': 3, 'n_units_l0': 309, 'activation0': 'linear', 'dropout0': 0.16142889562535626, 'n_units_l1': 138, 'activation1': 'swish', 'dropout1': 0.4017422879433487, 'n_units_l2': 398, 'activation2': 'sigmoid', 'dropout2': 0.30006760453802533, 'finalact': 'swish', 'LRfactor': 0.26461943339709043, 'LRpatience': 10, 'optimizer': 'adam'}. Best is trial 19 with value: 0.0022323420271277428.
[I 2023-05-04 21:58:12,820] Trial 20 finished with value: 0.002155124209821224 and parameters: {'n_layers': 3, 'n_units_l0': 197, 'activation0': 'linear', 'dropout0': 0.14939677163716047, 'n_units_l1': 193, 'activation1': 'swish', 'dropout1': 0.40035016781108546, 'n_units_l2': 376, 'activation2': 'sigmoid', 'dropout2': 0.29387234491389896, 'finalact': 'linear', 'LRfactor': 0.16202265241592126, 'LRpatience': 10, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 21:59:53,432] Trial 21 finished with value: 0.002449318766593933 and parameters: {'n_layers': 3, 'n_units_l0': 323, 'activation0': 'linear', 'dropout0': 0.15681136752530977, 'n_units_l1': 194, 'activation1': 'swish', 'dropout1': 0.3908307192158927, 'n_units_l2': 353, 'activation2': 'sigmoid', 'dropout2': 0.292021819408258, 'finalact': 'linear', 'LRfactor': 0.1532165786804657, 'LRpatience': 10, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 22:01:32,500] Trial 22 finished with value: 0.002280886285007 and parameters: {'n_layers': 3, 'n_units_l0': 221, 'activation0': 'linear', 'dropout0': 0.067689589578532, 'n_units_l1': 396, 'activation1': 'swish', 'dropout1': 0.3833348197002681, 'n_units_l2': 168, 'activation2': 'sigmoid', 'dropout2': 0.308862505529691, 'finalact': 'linear', 'LRfactor': 0.25650878087315787, 'LRpatience': 9, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 22:02:31,296] Trial 23 finished with value: 0.002222359413281083 and parameters: {'n_layers': 3, 'n_units_l0': 309, 'activation0': 'linear', 'dropout0': 0.15449851362758502, 'n_units_l1': 142, 'activation1': 'swish', 'dropout1': 0.3142397484419011, 'n_units_l2': 330, 'activation2': 'sigmoid', 'dropout2': 0.37650755745813574, 'finalact': 'linear', 'LRfactor': 0.006563801861941221, 'LRpatience': 8, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 22:03:01,315] Trial 24 finished with value: 0.025332503020763397 and parameters: {'n_layers': 3, 'n_units_l0': 188, 'activation0': 'linear', 'dropout0': 0.15160521525766119, 'n_units_l1': 156, 'activation1': 'swish', 'dropout1': 0.2961503996956677, 'n_units_l2': 371, 'activation2': 'sigmoid', 'dropout2': 0.3667782295941331, 'finalact': 'linear', 'LRfactor': 0.0012695372904482083, 'LRpatience': 8, 'optimizer': 'sgd'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 22:03:40,157] Trial 25 finished with value: 0.025236334651708603 and parameters: {'n_layers': 3, 'n_units_l0': 300, 'activation0': 'linear', 'dropout0': 0.052550274581807846, 'n_units_l1': 122, 'activation1': 'swish', 'dropout1': 0.4019088025292165, 'n_units_l2': 398, 'activation2': 'sigmoid', 'dropout2': 0.4161923253158957, 'finalact': 'swish', 'LRfactor': 0.09527728529910556, 'LRpatience': 5, 'optimizer': 'RMSprop'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 22:04:17,147] Trial 26 finished with value: 0.0021845053415745497 and parameters: {'n_layers': 3, 'n_units_l0': 237, 'activation0': 'relu', 'dropout0': 0.016016209903262885, 'n_units_l1': 218, 'activation1': 'swish', 'dropout1': 0.33563232437318946, 'n_units_l2': 261, 'activation2': 'sigmoid', 'dropout2': 0.29746006798122976, 'finalact': 'sigmoid', 'LRfactor': 0.02577697345885166, 'LRpatience': 11, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 22:04:39,006] Trial 27 finished with value: 0.0022640665993094444 and parameters: {'n_layers': 2, 'n_units_l0': 158, 'activation0': 'relu', 'dropout0': 0.024388311603342677, 'n_units_l1': 215, 'activation1': 'swish', 'dropout1': 0.3206358320397621, 'finalact': 'sigmoid', 'LRfactor': 0.00673317775445606, 'LRpatience': 12, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 22:05:14,122] Trial 28 finished with value: 0.0022593578323721886 and parameters: {'n_layers': 3, 'n_units_l0': 236, 'activation0': 'relu', 'dropout0': 0.02005084880934265, 'n_units_l1': 215, 'activation1': 'swish', 'dropout1': 0.3558095675348199, 'n_units_l2': 249, 'activation2': 'sigmoid', 'dropout2': 0.3320202293339337, 'finalact': 'sigmoid', 'LRfactor': 0.04551583786492822, 'LRpatience': 8, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
[I 2023-05-04 22:05:34,064] Trial 29 finished with value: 0.0027342387475073338 and parameters: {'n_layers': 2, 'n_units_l0': 137, 'activation0': 'sigmoid', 'dropout0': 0.004848985915667459, 'n_units_l1': 158, 'activation1': 'swish', 'dropout1': 0.2775840109641383, 'finalact': 'relu', 'LRfactor': 0.0352036875097686, 'LRpatience': 9, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
Number of finished trials: 30
Best trial:
  Value: 0.002155124209821224
In [27]:
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
  Params: 
    n_layers: 3
    n_units_l0: 197
    activation0: linear
    dropout0: 0.14939677163716047
    n_units_l1: 193
    activation1: swish
    dropout1: 0.40035016781108546
    n_units_l2: 376
    activation2: sigmoid
    dropout2: 0.29387234491389896
    finalact: linear
    LRfactor: 0.16202265241592126
    LRpatience: 10
    optimizer: adam
In [28]:
#From Optuna same data
model =Sequential()
model.add(keras.layers.LSTM(197,input_shape=(1, data_train.shape[1]-1),return_sequences=True,activation=tf.keras.activations.linear))
model.add(keras.layers.Dropout(0.15))
model.add(keras.layers.LSTM(193,input_shape=(1, data_train.shape[1]-1),return_sequences=True,activation=tf.keras.activations.swish))
model.add(keras.layers.Dropout(0.40))
model.add(keras.layers.LSTM(376,input_shape=(1, data_train.shape[1]-1),return_sequences=True,activation=tf.keras.activations.sigmoid))
model.add(keras.layers.Dropout(0.29))
model.add(keras.layers.Dense(1,activation=tf.keras.activations.linear))
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.16,patience=10,min_lr=1e-05,verbose=0)
early_stoping = EarlyStopping(monitor="val_loss",min_delta=0,patience=10,verbose=0,mode="auto",restore_best_weights=True)
model.compile(optimizer='adam',loss='mse')
val_ds = (xval_n,yval_n)
history = model.fit(xtrain_n,ytrain_n,validation_data=val_ds,epochs=200,verbose=0,callbacks=[early_stoping,reduce_lr])
In [29]:
hist=pd.DataFrame(history.history)
hist['epoch']=history.epoch

trace1 = go.Scatter(
x=hist['epoch'], y=hist['loss'],
    mode='lines', name='Train_loss'
)
trace2 = go.Scatter(
x=hist['epoch'], y=hist['val_loss'],
    mode='lines', name='Val_loss'
)

layout= go.Layout(
title= 'Optuna Loss best model History',
xaxis={'title':'Epoch'},
yaxis={'title':'Loss'}
)

fig= go.Figure(data=[trace1,trace2], layout=layout)
fig.show()
fig.write_html("Optuna Loss best model.html")
In [30]:
#From Optuna
model =Sequential()
model.add(keras.layers.LSTM(197,input_shape=(1, data_trainall.shape[1]-1),return_sequences=True,activation=tf.keras.activations.linear))
model.add(keras.layers.Dropout(0.15))
model.add(keras.layers.LSTM(193,input_shape=(1, data_trainall.shape[1]-1),return_sequences=True,activation=tf.keras.activations.swish))
model.add(keras.layers.Dropout(0.40))
model.add(keras.layers.LSTM(376,input_shape=(1, data_trainall.shape[1]-1),return_sequences=True,activation=tf.keras.activations.sigmoid))
model.add(keras.layers.Dropout(0.29))
model.add(keras.layers.Dense(1,activation=tf.keras.activations.linear))
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.16,patience=10,min_lr=1e-05,verbose=0)
early_stoping = EarlyStopping(monitor="loss",min_delta=0,patience=10,verbose=0,mode="auto",restore_best_weights=True)
model.compile(optimizer='adam',loss='mse')
history = model.fit(xtrainall_n,ytrainall_n,epochs=200,verbose=1,callbacks=[early_stoping,reduce_lr])
Epoch 1/200
79/79 [==============================] - 4s 11ms/step - loss: 0.1819 - lr: 0.0010
Epoch 2/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0784 - lr: 0.0010
Epoch 3/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0775 - lr: 0.0010
Epoch 4/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0680 - lr: 0.0010
Epoch 5/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0331 - lr: 0.0010
Epoch 6/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0140 - lr: 0.0010
Epoch 7/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0107 - lr: 0.0010
Epoch 8/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0100 - lr: 0.0010
Epoch 9/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0091 - lr: 0.0010
Epoch 10/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0086 - lr: 0.0010
Epoch 11/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0080 - lr: 0.0010
Epoch 12/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0078 - lr: 0.0010
Epoch 13/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0077 - lr: 0.0010
Epoch 14/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0074 - lr: 0.0010
Epoch 15/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0072 - lr: 0.0010
Epoch 16/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0071 - lr: 0.0010
Epoch 17/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0070 - lr: 0.0010
Epoch 18/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0070 - lr: 0.0010
Epoch 19/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0068 - lr: 0.0010
Epoch 20/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0072 - lr: 0.0010
Epoch 21/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0069 - lr: 0.0010
Epoch 22/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0069 - lr: 0.0010
Epoch 23/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0065 - lr: 0.0010
Epoch 24/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0065 - lr: 0.0010
Epoch 25/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0067 - lr: 0.0010
Epoch 26/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0064 - lr: 0.0010
Epoch 27/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0066 - lr: 0.0010
Epoch 28/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0065 - lr: 0.0010
Epoch 29/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0065 - lr: 0.0010
Epoch 30/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0066 - lr: 0.0010
Epoch 31/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0066 - lr: 0.0010
Epoch 32/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0063 - lr: 0.0010
Epoch 33/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0063 - lr: 0.0010
Epoch 34/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0062 - lr: 0.0010
Epoch 35/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0064 - lr: 0.0010
Epoch 36/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0062 - lr: 0.0010
Epoch 37/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0058 - lr: 0.0010
Epoch 38/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0061 - lr: 0.0010
Epoch 39/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0062 - lr: 0.0010
Epoch 40/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0063 - lr: 0.0010
Epoch 41/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0059 - lr: 0.0010
Epoch 42/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0060 - lr: 0.0010
Epoch 43/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0061 - lr: 0.0010
Epoch 44/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0062 - lr: 0.0010
Epoch 45/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0061 - lr: 0.0010
Epoch 46/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0058 - lr: 0.0010
Epoch 47/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0059 - lr: 0.0010
Epoch 48/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 1.6000e-04
Epoch 49/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 1.6000e-04
Epoch 50/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0056 - lr: 1.6000e-04
Epoch 51/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.6000e-04
Epoch 52/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0056 - lr: 1.6000e-04
Epoch 53/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.6000e-04
Epoch 54/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0055 - lr: 1.6000e-04
Epoch 55/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0055 - lr: 1.6000e-04
Epoch 56/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0056 - lr: 1.6000e-04
Epoch 57/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0055 - lr: 1.6000e-04
Epoch 58/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.6000e-04
Epoch 59/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 1.6000e-04
Epoch 60/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0055 - lr: 1.6000e-04
Epoch 61/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 1.6000e-04
Epoch 62/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 2.5600e-05
Epoch 63/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0053 - lr: 2.5600e-05
Epoch 64/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 2.5600e-05
Epoch 65/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0053 - lr: 2.5600e-05
Epoch 66/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 2.5600e-05
Epoch 67/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 2.5600e-05
Epoch 68/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 2.5600e-05
Epoch 69/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 2.5600e-05
Epoch 70/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0053 - lr: 2.5600e-05
Epoch 71/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0053 - lr: 2.5600e-05
Epoch 72/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 2.5600e-05
Epoch 73/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 2.5600e-05
Epoch 74/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 1.0000e-05
Epoch 75/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0052 - lr: 1.0000e-05
Epoch 76/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0053 - lr: 1.0000e-05
Epoch 77/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0053 - lr: 1.0000e-05
Epoch 78/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 1.0000e-05
Epoch 79/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.0000e-05
Epoch 80/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0053 - lr: 1.0000e-05
Epoch 81/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0052 - lr: 1.0000e-05
Epoch 82/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 1.0000e-05
Epoch 83/200
79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 1.0000e-05
Epoch 84/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.0000e-05
Epoch 85/200
79/79 [==============================] - 1s 11ms/step - loss: 0.0053 - lr: 1.0000e-05
In [31]:
prediction=model.predict(xtest_n)
1/1 [==============================] - 0s 352ms/step
In [32]:
final_values=[]
for i in range(len(prediction)):
    final_values.append(prediction[i][0][0])
In [33]:
df_final = pd.DataFrame(0, index=np.arange(len(data_test)), columns=data_test.columns)
In [34]:
df_final['GSPC']=final_values
In [35]:
df_final = scaler_all.inverse_transform(df_final)
In [36]:
final_values_rescaled=[]
for i in range(len(df_final)):
    final_values_rescaled.append(df_final[i][0])
In [37]:
trace1 = go.Scatter(
x=pd.to_datetime(traindates), y=data_train.iloc[:,0],
    mode='lines', name='Datatrain'
)
trace2 = go.Scatter(
x=pd.to_datetime(testdates), y=data_test.iloc[:,0],
    mode='lines', name='Datatest'
)
trace3 = go.Scatter(
x=pd.to_datetime(valdates), y=data_val.iloc[:,0],
    mode='lines', name='Dataval'
)
trace4 = go.Scatter(
x=pd.to_datetime(testdates), y=final_values_rescaled,
    mode='lines', name='Prediction'
)
layout= go.Layout(
title= 'S&P 500 Forecast',
xaxis={'title':'Date'},
yaxis={'title':'Close'}
)

fig= go.Figure(data=[trace1,trace2,trace3,trace4], layout=layout)
fig.show()
fig.write_html("SP500.html")
In [38]:
from sklearn.metrics import r2_score,mean_squared_error
import math
r2_score(data_test.iloc[:,0], final_values_rescaled)
Out[38]:
0.8381477171656116
In [39]:
mse = mean_squared_error(data_test.iloc[:,0], final_values_rescaled)
rmse = math.sqrt(mse)
rmse
Out[39]:
0.41265596520482867
In [40]:
plt.plot(data_test.iloc[:,0], final_values_rescaled, 'ro')
plt.show()
In [41]:
model_json = model.to_json()
with open("modelRendSP500.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("modelRendSP500.h5")
In [43]:
import joblib
joblib.dump(scaler_all, 'scalerRendSP500.gz')
Out[43]:
['scalerRendSP500.gz']
In [ ]: